package com.guiji.quartz.formatter;

import cn.hutool.core.util.ReUtil;
import us.codecraft.webmagic.model.formatter.ObjectFormatter;

/**
 * @program: cms-vue-plus
 * @description:
 * @author: gaoX
 * @date 2021/12/24 11:20
 */
public class ContentFormatter implements ObjectFormatter<String> {
	@Override
	public String format(String raw) throws Exception {
		StringBuffer content = new StringBuffer();
		raw = raw.replaceAll("\n","").replaceAll("<script>.*?</script>","");
		ReUtil.findAll("<p>.*?</p>",raw,0).forEach(s -> {
			content.append(s.replaceAll("<.*?>", "").replaceAll("&nbsp;", " ") + "$$$");
		});
		return content.toString();
	}

	@Override
	public Class<String> clazz() {
		return null;
	}

	@Override
	public void initParam(String[] extra) {

	}
}
